rm(list=ls(all=TRUE))
library(metafor)
library(clubSandwich)
library(dplyr)
library(ggplot2)

######################################
##### LIT
######################################
setwd("~/Dropbox/Apps/ShareLaTeX/Minimal Effects/replication/data")

data <- read.csv("MPSA Persuasion MetaAnalysis - Candidate Campaign.csv", stringsAsFactors = FALSE)

# Recoding fun
data <- rename(data, CACE = CACE..in.pp., SE = SE..in.pp.)
data$CACE <- data$CACE * 100
data$SE <- data$SE * 100

data <- subset(data, Experiment...for.clustering != 'Spenkuch and Toniatti (2016)') # Not really natural experiment or DID
data <- subset(data, Experiment...for.clustering != 'Huber and Arceneaux (2007)') # Not really natural experiment or DID
data <- subset(data, Experiment...for.clustering != 'Shaw and Gimpel (2012)') # Not really outreach from a campaign
data <- subset(data, substr(data$Experiment...for.clustering, 1, 5) != 'Green') # Not really outreach from a campaign
data <- subset(data, Primary.or.General == 'General')

data$Experiment <- factor(data$Experiment...for.clustering)

data$Personal <- ifelse(data$Treatment.Mode %in% c("Candidate Canvass", "Canvass",
                                                   "Phone"), 1, 0)

data$Treatment.days.before.election <- with(data, Days.after.treatment.survey.was.taken - Days.after.election.survey.was.taken)
data$Treatment.Within.60.Days <- ifelse(data$Treatment.days.before.election < 60, 1, 0)

lit <- select(data, CACE, SE,
              Measurement.Mode = Measurement.mode,
              Experiment.for.Clustering = Experiment,
              Personal, Treatment.Mode, Treatment.Within.60.Days)
lit$label <- as.character(lit$Experiment.for.Clustering)
lit$label <- paste0(lit$label, ' - ', lit$Treatment.Mode)
lit$WA <- 0

###################################
########### WA
###################################
data <- read.csv("master_sheet_output.csv", stringsAsFactors = FALSE)
data <- subset(data, !is.na(Candidate.Effect.with.Covars))

data$ExperimentName <- gsub("Canvass", "Experiment", data$ExperimentName)
data$label <- paste0(data$ExperimentName, ' - ', data$Seat, ' - ', data$Treatment.Mode)

data$Treatment.Within.60.Days <- as.numeric(data$Data != 'OH_EarlyCanvass_2016.dta' & # Canvass in May
                                              data$Data != 'OH_LateCanvass_2016.dta') # Canvass in August

data <- data[order(data$Date.Canvass),]
# Convert to pp and calcualte variance
data$CACE <- data$Candidate.Effect.with.Covars * 50
data$SE <- data$Candidate.Effect.SE.with.Covars * 50

data <- subset(data, Primary.or.General == 'General')

wa <- select(data, CACE, SE, Experiment.for.Clustering = Experiment, label,
             Treatment.Within.60.Days, Treatment.Mode)
wa$Measurement.Mode <- 'Survey'
wa$Personal <- 1
wa$WA <- 1

# Merge
data <- rbind.data.frame(lit, wa)

##Keep the 4 experiments we care about
data <- subset(data, Experiment.for.Clustering %in% c("Doherty and Adler (2014)",
                                                      "Gerber et al. (2011b)",
                                                      "Gerber et al. (2011c)",
                                                     # "Ohio_LongShort_2016",
                                                      "Ohio_Canvass_2016", "Ohio_DiffDiff_2016",
                                                      "Wash_Special_2015"),
               select = c("CACE", "SE", "Treatment.Within.60.Days",
                          "Experiment.for.Clustering", "label"))

data <- subset(data, !(data$label %in% c("Gerber et al. (2011b) - Radio",
                                         "Original Study - OH Experiment August - President - Canvass",
                                         "Original Study - OH Experiment Election Day - President - Canvass",
                                         "Original Study - OH DID - President - Canvass")))

data[data$Experiment.for.Clustering == "Ohio_Canvass_2016",]$label <- "Original Canvass Study - OH Senate"
data[data$Experiment.for.Clustering == "Ohio_DiffDiff_2016",]$label <- "Original Canvass Study - OH Senate"
data[data$Experiment.for.Clustering == "Wash_Special_2015",]$label  <- "Original Canvass Study - WA State Legislator"

#Manually enter subset by experiment.
data$Subset <- c("Early Treatment, \n Early Measurement", "Early Treatment, \n Late Measurement", "New Sample \n Later in Election",
                 "Early Treatment, \n Early Measurement", "Early Treatment, \n Late Measurement", "New Sample \n Later in Election",
                 "Early Treatment, \n Early Measurement", "Early Treatment, \n Late Measurement",
                 "Early Treatment, \n Early Measurement", "Early Treatment, \n Late Measurement",
                 "New Sample \n Later in Election")

#Create missing data for the follow-up WA study.
data[12,] <- NA
data[12,"CACE"] <- 15
data[12,"SE"] <- 0
data[12, "label"] <- "Original Canvass Study - WA State Legislator"
data[12, "Subset"] <- "New Sample \n Later in Election"

data

#Make plot

early_late_plot <- function(df, experiment) {
  subset <- subset(df, label == experiment)
  ggplot(subset, aes(x=Subset, y=CACE)) +
    geom_point(size = 5) + 
    geom_errorbar(width=.1, aes(ymin=CACE - 1.96 * SE, ymax=CACE + 1.96 * SE), colour = "blue") + 
    xlab("") + ylab("Treatment Effect (CACE)") + 
    coord_cartesian(ylim=c(-6,11)) + 
    ggtitle(subset$label) + 
    geom_hline(yintercept = 0, colour = "black") + 
    geom_vline(xintercept = 2.5, colour = "red", linetype="dotted") + 
    theme_bw() + 
    theme(axis.text = element_text(size=15))
}

# Multiple plot function
#
# ggplot objects can be passed in ..., or to plotlist (as a list of ggplot objects)
# - cols:   Number of columns in layout
# - layout: A matrix specifying the layout. If present, 'cols' is ignored.
#
# If the layout is something like matrix(c(1,2,3,3), nrow=2, byrow=TRUE),
# then plot 1 will go in the upper left, 2 will go in the upper right, and
# 3 will go all the way across the bottom.
#
multiplot <- function(..., plotlist=NULL, file, cols=1, layout=NULL) {
  library(grid)
  
  # Make a list from the ... arguments and plotlist
  plots <- c(list(...), plotlist)
  
  numPlots = length(plots)
  
  # If layout is NULL, then use 'cols' to determine layout
  if (is.null(layout)) {
    # Make the panel
    # ncol: Number of columns of plots
    # nrow: Number of rows needed, calculated from # of cols
    layout <- matrix(seq(1, cols * ceiling(numPlots/cols)),
                     ncol = cols, nrow = ceiling(numPlots/cols))
  }
  
  if (numPlots==1) {
    print(plots[[1]])
    
  } else {
    # Set up the page
    grid.newpage()
    pushViewport(viewport(layout = grid.layout(nrow(layout), ncol(layout))))
    
    # Make each plot, in the correct location
    for (i in 1:numPlots) {
      # Get the i,j matrix positions of the regions that contain this subplot
      matchidx <- as.data.frame(which(layout == i, arr.ind = TRUE))
      
      print(plots[[i]], vp = viewport(layout.pos.row = matchidx$row,
                                      layout.pos.col = matchidx$col))
    }
  }
}

doherty <- early_late_plot(data, "Doherty and Adler (2014) - Mail")
gerber <- early_late_plot(data, "Gerber et al. (2011b) - TV")
wa <- early_late_plot(data, "Original Canvass Study - WA State Legislator")
oh <- early_late_plot(data, "Original Canvass Study - OH Senate")

pdf('../figures/summary_earlylate.pdf', width = 14, height = 10)
multiplot(doherty, oh, gerber, wa, cols=2)
dev.off()
